package xyz.syyrjx.blog.util.site;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * @author 大地崩坏苍蝇兽
 * @date 2024/2/20 15:50
 * @description 网站地图解析工具
 * @since v0.0.1
 */
public class SiteMapUtil {

    public static String siteMapPath;


    public static List<String> getAllAllowUrl() throws DocumentException {
        List<String> siteUrlsAllow = new ArrayList<>();

        SAXReader reader = new SAXReader();
        // 设置文件编码
        reader.setEncoding("UTF-8");
        // 读取文件
        Document document = reader.read(siteMapPath);
        // 获取根节点标签
        Element root = document.getRootElement();
        // 开始url节点
        Iterator iteratorRoot = root.elementIterator();
        while (iteratorRoot.hasNext()) {
            Element urlElement = (Element) iteratorRoot.next();
                if ("url".equals(urlElement.getName())) {
                    // 遍历loc节点
                    Iterator iterator = urlElement.elementIterator();
                    while (iterator.hasNext()) {
                        Element dataElement = (Element) iterator.next();
                            if ("loc".equals(dataElement.getName())) {
                                String data = dataElement.getStringValue();
                                siteUrlsAllow.add(data);
                        }
                }
            }
        }
        return siteUrlsAllow;
    }
}
